"""
@FileName：4解析html.py
@Author：lmz
@Time：2021/5/21 21:25
用requets库正访问https://so.gushiwen.org/shiwens/default.aspx 并用 bs4解析html。
按照页码保存到文件中
"""
import requests
from bs4 import BeautifulSoup


for i in range(5):
    if i ==0:
        url = 'https://so.gushiwen.org/shiwens/default.aspx'
    else:
        cont = str(i+1)
        url = 'https://www.gushiwen.cn/default_'+cont+'.aspx'

    resp = requests.get(url)
    html = resp.text
    bs = BeautifulSoup(html,'lxml')

    sons = bs.find_all('div',class_='sons')
    for son in sons:
        try:
            name = son.find('b', ).string
            print(name)
            author = son.find('p', class_='source').get_text()
            print(author)
            content = son.find('div', class_='contson').get_text().strip()
            print(content)
            with open('hello/test.txt','a',encoding='utf-8') as f:
                f.write(name+'\n')
                f.write(author+'\n')
                f.write(content+'n')
            f.close()
        except:
            pass



